'''
Created on Apr 18, 2010

@author: oabalbin
'''

import sys
from collections import deque


class parser_rnaseq():
    
    def read_rnaseqexp_file(self, inputfile, outfile, protgenes=False):
        """ 
        Reads a expression file composite files.
        """
        
        #this_outfile= open("/home/oabalbin/projects/networks/EZH2_lists/allCodingGenes.txt",'w')        
        sampleName = deque()
        #recordque=deque()
        datastart,header=False,False
        
        for line in inputfile:
            line = line.strip('\n')
            fields = line.split('\t')
            
            # To skip headers, star reading samples in column 7 of the file
            if fields[0] == '':
                continue
            
            if fields[0] == '@StartMatrixData':
                datastart=True 
                continue
            
            if fields[0] == 'name':
                header=True
                sampleName=fields[2:]
                print sampleName
                continue
                    
            if datastart and header:
                geneName=fields[0]
                interval=fields[1]
                expValue=fields[2:]
                
                if protgenes and not geneName.startswith('NM_'):
                    if protgenes and not geneName.startswith('NR_'):
                        continue
                    
                #this_outfile.write(geneName+'\n')
                
                for exp,sample in zip(expValue,sampleName):
                    outfile.write(interval+'\t'+geneName+'\t'+sample+'\t'+exp+'\n')


'''
inputfile=open('/exds/users/oabalbin/projects/rnaseq_data/noveltu_expression_data/protein_coding_genes.exprmat.txt')
outputfile=open('/exds/users/oabalbin/projects/rnaseq_data/noveltu_expression_data/protein_coding_genes.exprmat.txt_test','w')
myparse = parser_rnaseq()
myparse.read_rnaseqexp_file(inputfile,outputfile,True)
'''